{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 挑战 28：地球地表平均气温变化趋势预测分析"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据清洁"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df_GreenhouseGas = pd.read_csv('GreenhouseGas.csv', header=0)\n",
    "df_GlobalSurfaceTemperature = pd.read_csv('GlobalSurfaceTemperature.csv', header=0)\n",
    "df_CO2ppm = pd.read_csv('CO2ppm.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "scrolled": true
   },
   "source": [
    "### 整理对齐数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_GreenhouseGas_new = pd.DataFrame(df_GreenhouseGas[['N2O', 'CH4', 'CO2']].values,\n",
    "    index=pd.to_datetime(df_GreenhouseGas['Year'].astype(str)),\n",
    "    columns=['N2O', 'CH4', 'CO2']\n",
    "    )\n",
    "df_GlobalSurfaceTemperature_new = pd.DataFrame(df_GlobalSurfaceTemperature[['Median', 'Upper', 'Lower']].values,\n",
    "    index=pd.to_datetime(df_GlobalSurfaceTemperature['Year'].astype(str)),\n",
    "    columns=['Median', 'Upper', 'Lower']\n",
    "    )\n",
    "df_CO2ppm_new = pd.DataFrame(df_CO2ppm.iloc[:, 1].values,\n",
    "    index=pd.to_datetime(df_CO2ppm['Year'].astype(str)),\n",
    "    columns=['CO2_PPM'])\n",
    "df_merge = pd.concat([df_GreenhouseGas_new, df_CO2ppm_new, df_GlobalSurfaceTemperature_new], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "scrolled": true
   },
   "source": [
    "### 数据填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature = df_merge.iloc[:, 0:4].fillna(method='ffill').fillna(method='bfill')\n",
    "feature_train = feature['1970-01-01':'2010-01-01']\n",
    "feature_test = feature['2011-01-01':'2017-01-01']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "scrolled": true
   },
   "source": [
    "### Median 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.56795075, 0.59403173, 0.62626189, 0.65107287, 0.68079436,\n",
       "       0.72550582, 0.72550582])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "target_Median = df_merge.iloc[:, 4]\n",
    "target_Median_train = target_Median['1970-01-01':'2010-01-01']\n",
    "model_Median = LinearRegression()\n",
    "model_Median.fit(feature_train, target_Median_train)\n",
    "Median_predictions = model_Median.predict(feature_test)\n",
    "\n",
    "Median_predictions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "scrolled": true
   },
   "source": [
    "### Upper 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.6069707 , 0.63415287, 0.6687714 , 0.69603192, 0.72868776,\n",
       "       0.77781349, 0.77781349])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_Upper = df_merge.iloc[:, 5]\n",
    "target_Upper_train = target_Upper['1970-01-01':'2010-01-01']\n",
    "model_Upper = LinearRegression()\n",
    "model_Upper.fit(feature_train, target_Upper_train)\n",
    "Upper_predictions = model_Upper.predict(feature_test)\n",
    "\n",
    "Upper_predictions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "scrolled": true
   },
   "source": [
    "### Lower 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.53187933, 0.55686146, 0.58718339, 0.60984838, 0.63699914,\n",
       "       0.67784333, 0.67784333])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_Lower = df_merge.iloc[:, 6]\n",
    "target_Lower_train = target_Lower['1970-01-01':'2010-01-01']\n",
    "model_Lower = LinearRegression()\n",
    "model_Lower.fit(feature_train, target_Lower_train)\n",
    "Lower_predictions = model_Lower.predict(feature_test)\n",
    "\n",
    "Lower_predictions"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
